import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import spbn_classifier as spbn
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from skopt.space import Real, Categorical, Integer
from sklearn.tree import plot_tree
from sklearn.ensemble import GradientBoostingClassifier
SEED = 12312548
np.random.seed(SEED)
with open('../../Datos/var_sets.json') as f:
f_vars = json.load(f)
with open('../../Datos/f_sel.json') as f:
f_sel = json.load(f)
with open('../../Datos/nombres_verbose.json') as f:
f_verbose = json.load(f)
df = pd.read_csv('../../Datos/FJD_v9-10_1_din.csv')
df.head()
| REGISTRO | Fingplan | Fecha_emision | Fumador | Cardio | Pulmonar | Diabetes | Renal | Neuro | Onco | ... | TADprimera | TADMin | TADMax | FCprimera | FCMin | FCMax | TempPrimera | TempMin | TempMax | TiempoIngreso | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | FJD_1 | 2020-03-22 17:31:12 | 2020-03-21 18:03:00 | Exfumador/a | No | No | Si | No | No | No | ... | 62.0 | 47.0 | 99.0 | 91.0 | 34.0 | 125.0 | 36.9 | 35.0 | 38.8 | 87.902627 |
| 1 | FJD_1 | 2020-03-22 17:31:12 | 2020-03-23 01:59:00 | Exfumador/a | No | No | Si | No | No | No | ... | 62.0 | 47.0 | 99.0 | 91.0 | 34.0 | 125.0 | 36.9 | 35.0 | 38.8 | 87.902627 |
| 2 | FJD_1 | 2020-03-22 17:31:12 | 2020-03-24 06:44:00 | Exfumador/a | No | No | Si | No | No | No | ... | 62.0 | 47.0 | 99.0 | 91.0 | 34.0 | 125.0 | 36.9 | 35.0 | 38.8 | 87.902627 |
| 3 | FJD_1 | 2020-03-22 17:31:12 | 2020-03-24 18:55:00 | Exfumador/a | No | No | Si | No | No | No | ... | 62.0 | 47.0 | 99.0 | 91.0 | 34.0 | 125.0 | 36.9 | 35.0 | 38.8 | 87.902627 |
| 4 | FJD_1 | 2020-03-22 17:31:12 | 2020-03-25 19:55:00 | Exfumador/a | No | No | Si | No | No | No | ... | 62.0 | 47.0 | 99.0 | 91.0 | 34.0 | 125.0 | 36.9 | 35.0 | 38.8 | 87.902627 |
5 rows × 88 columns
int_to_cat = [x for x in f_vars['meds_full'] if x in df.columns]
df = df.astype({x: 'str' for x in int_to_cat}).astype({x: 'category' for x in int_to_cat})
to_float = df.columns[df.dtypes == 'int64']
df = df.astype({x: 'float64' for x in to_float})
to_cat = [x for x in df.columns[df.dtypes == object] if x not in ["REGISTRO", "Fingplan", "Faltplan", "Fecha_emision", "Ola"]]
df = df.astype({x: 'category' for x in to_cat})
df['TiempoIngreso'] = pd.qcut(df['TiempoIngreso'], 5).astype(str).astype('category')
df = df.groupby('REGISTRO').aggregate('first')
translate = f_verbose['en']
inv_translate = {v:k for k, v in translate.items()}
df.rename(f_verbose['en'], inplace=True, axis=1, )
Conjunto train, test y validación...
df_train = df[~df['esTest'] & ~df['esVal']].drop(['esTest', 'esVal'], axis=1)
df_test = df[df['esTest'] & ~df['esVal']].drop(['esTest', 'esVal'], axis=1)
df_val = df[~df['esTest'] & df['esVal']].drop(['esTest', 'esVal'], axis=1)
from sklearn.metrics import roc_auc_score, confusion_matrix, ConfusionMatrixDisplay, RocCurveDisplay, brier_score_loss
from sklearn.calibration import CalibrationDisplay
from joblib import dump, load
from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler, OrdinalEncoder, OneHotEncoder
import shap
import pprint
from IPython.display import HTML, display
import tabulate
class MyPipeline(Pipeline):
# https://stackoverflow.com/questions/33469633/how-to-transform-items-using-sklearn-pipeline
def just_transforms(self, X):
"""Applies all transforms to the data, without applying last
estimator.
Parameters
----------
X : iterable
Data to predict on. Must fulfill input requirements of first step of
the pipeline.
"""
Xt = X
for name, transform in self.steps[:-1]:
Xt = transform.transform(Xt)
return self.steps[-1][1], Xt
def dict_to_table(d, header=['', '']):
arr = [[a,b] for a,b in d.items()]
table = tabulate.tabulate(arr, tablefmt='html', headers=header)
display(HTML(table))
def variables_olas(olas):
olas_str = [str(x) for x in olas]
variables = set()
#variables = variables.union(set(f_sel['rf_' + '.'.join(olas_str)]['EXITUS']))
variables = variables.union(set(f_sel['gbm_' + '.'.join(olas_str)]['EXITUS']))
variables = [translate[v] for v in variables]
return list(variables)
def seleccion_olas(df, olas):
variables = variables_olas(olas)
return df[df[translate['Ola']].isin(olas)][variables + [translate['EXITUS']]]
def model_tune(model, tune_grid, X_train, y_train, scale=False, encode=None, n_iter=50, verbose=0):
steps = []
if encode == 'ordinal':
steps.append(('encode', OrdinalEncoder()))
elif encode == 'ohe':
steps.append(('encode', OneHotEncoder()))
if scale:
steps.append(('scale', RobustScaler()))
if steps:
steps.append(('model', model))
model = MyPipeline(steps)
tune_grid = {f'model__{p}': tune_grid[p] for p in tune_grid.keys()}
tuned_model = BayesSearchCV(
model, tune_grid, n_jobs=-1, cv=5, random_state=SEED, scoring='roc_auc', n_iter=n_iter, verbose=verbose
).fit(X_train, y_train)
print(f'Tune result: {tuned_model.best_score_} auc.')
return tuned_model.best_estimator_, tuned_model.best_score_
def model_report(model, X, y, id_persistencia=None):
out = {'metrics': {}, 'model': model, 'y': y}
feature_names = list(X.columns)
out['feature_names'] = feature_names
if str(type(model)) == str(MyPipeline):
model, X = model.just_transforms(X)
metrics = out['metrics']
out['y_prob'] = y_prob = model.predict_proba(X)[:, 1]
metrics['roc'] = roc_auc_score(y, y_prob)
metrics['brier_loss'] = brier_score_loss(y, y_prob, pos_label="S")
out['y_pred'] = y_pred = model.predict(X)
tn, fp, fn, tp = confusion_matrix(y, y_pred).ravel()
metrics['specificity'] = tn / (tn + fp)
metrics['sensitivity'] = tp / (tp + fn)
metrics['accuracy'] = (tp + tn) / (tp + tn + fp + fn)
if id_persistencia:
dump(out, id_persistencia)
return out
def shap_plot(report, X_train, X_val, shap_explainer=None):
if shap_explainer:
model = report['model']
if str(type(model)) == str(MyPipeline):
_, X_train = model.just_transforms(X_train)
model, X_val = model.just_transforms(X_val)
if shap_explainer == shap.TreeExplainer:
shap_values = shap_explainer(model, X_train, seed=SEED).shap_values(X_train, check_additivity=False)
else:
shap_values = shap_explainer(model, X_train, seed=SEED).shap_values(X_train)
report['shap'] = shap_values
_ = plt.figure(figsize=(15,6))
if type(shap_values) == type([]):
shap_values = shap_values[1]
shap.summary_plot(shap_values, X_train, report['feature_names'], show=False, plot_size=None)
plt.show()
def plot_report(report, model_repr=None):
fig, axs = plt.subplot_mosaic([['left', 'right'],['bottom', 'bottom']],
figsize=(10, 10), constrained_layout=True)
RocCurveDisplay.from_predictions(report['y'], report['y_prob'], pos_label='S', ax=axs['left'])
axs['left'].set_xticks(np.arange(0, 1, step=0.1))
axs['left'].set_yticks(np.arange(0, 1, step=0.1))
CalibrationDisplay.from_predictions(report['y'], report['y_prob'], pos_label='S', ax=axs['right'])
axs['right'].set_xticks(np.arange(0, 1, step=0.1))
axs['right'].set_yticks(np.arange(0, 1, step=0.1))
ConfusionMatrixDisplay.from_predictions(report['y'], report['y_pred'], ax=axs['bottom'])
plt.show()
dict_to_table(report['metrics'], ['Metric', 'Value'])
model = report['model']
if model_repr:
if str(type(model)) == str(MyPipeline):
model = model.steps[-1][1]
model_repr(model, report['feature_names'])
dict_to_table(model.get_params(), ['Hyperparameter', 'Value'])
def generar_id(modelo, olas, disc=False, es_test=False):
if modelo is None:
return None
return ('modelosPersistidos/' +
modelo +
'_'.join([str(o) for o in olas]) +
('_disc' if disc else '') +
('_test' if es_test else '') +
'.joblib')
def model_demonstrate(olas, model, tune_grid, df_train, df_val, scale=False, encode=None,
shap_explainer=None, model_repr=None, class_name='EXITUS', nombre_modelo=None,
n_iter=50, verbose=0, es_test=False):
df_train_ola = seleccion_olas(df_train, olas)
df_val_ola = seleccion_olas(df_val, olas)
class_name = translate[class_name]
predictors_ola = [x for x in df_train_ola.columns if x != class_name]
X_train, y_train = df_train_ola[predictors_ola], df_train_ola[class_name]
X_val, y_val = df_val_ola[predictors_ola], df_val_ola[class_name]
if nombre_modelo is None:
nombre_modelo = model.__class__.__name__
if n_iter > 0:
print(f'Tuning params {", ".join(list(tune_grid.keys()))}')
tuned_model, train_score = model_tune(model, tune_grid, X_train, y_train, scale=scale, n_iter=n_iter, verbose=verbose)
else:
tuned_model, train_score = model.fit(X_train, y_train), None
id_modelo = generar_id(nombre_modelo, olas, disc=(encode is not None), es_test=es_test)
report = model_report(
tuned_model, X_val, y_val, id_persistencia=id_modelo)
shap_plot(report, X_train, X_val, shap_explainer)
report['train_score'] = train_score
plot_report(report, model_repr=model_repr)
return report, (X_train, y_train, X_val, y_val)
def lr_repr(lr, feature_names):
ws = {f: lr.coef_[0][i] for i, f in enumerate(feature_names)}
ws = dict(sorted(ws.items(), key=lambda item: abs(item[1]), reverse=True))
dict_to_table(ws, ['Feature', 'Weight'])
model = LogisticRegression(penalty='elasticnet', solver='saga', n_jobs=-1)
search_space = {
'C': Real(0.1, 100, prior='log-uniform'),
'l1_ratio': Real(0, 1, prior='uniform'),
'class_weight': Categorical(['balanced', None])
}
report, _ = model_demonstrate([1], model, search_space, df_train, df_val, scale=True, shap_explainer=shap.LinearExplainer, model_repr=lr_repr)
print(report['train_score'])
Tuning params C, l1_ratio, class_weight
The max_iter was reached which means the coef_ did not converge
Tune result: 0.9028736451430929 auc.
| Metric | Value |
|---|---|
| roc | 0.872247 |
| brier_loss | 0.130027 |
| specificity | 0.902439 |
| sensitivity | 0.6 |
| accuracy | 0.808989 |
| Feature | Weight |
|---|---|
| Age | 1.29664 |
| Oxygen saturation (daily maximum) | -0.641471 |
| Body temperaure (daily maximum) | 0.554038 |
| Lactate dehydrogenase (LDH) | 0.470849 |
| Heart rate (daily maximum) | 0.416539 |
| Red Cell Blood Distribution Width (RDW) | 0.403401 |
| Mean corpuscular volume | 0.35603 |
| Mean corpuscular hemoglobin concentration (MCHC) | -0.25308 |
| Glucose | 0.251309 |
| Albumin | -0.249037 |
| Blood urea nitrogen (BUN) | 0.239575 |
| Platelets | -0.228973 |
| Calcium | -0.189948 |
| Lymphocyte % | -0.185796 |
| Hemoglobin | -0.171281 |
| Diastolic blood pressure (daily maximum) | -0.15559 |
| Partial pressure of oxygen (Blood gas test) | 0.153068 |
| Heart rate (daily minimum) | 0.143473 |
| Oxygen saturation (daily minimum) | -0.137545 |
| Ferritin | 0.0892244 |
| Urea | 0.06244 |
| Systolic blood pressure (daily maximum) | 0.0406559 |
| Body temperature (daily minimum) | 0.0404112 |
| International normalized ratio (INR) | 0.0374522 |
| Lymphocyte count | -0.0227599 |
| Eosinophil % | -0.016723 |
| Red blood cells | -0.0115366 |
| Prothrombin time (PT) | 0.000732271 |
| Diastolic blood pressure (daily minimum) | 0 |
| Hematocrit | 0 |
| Hyperparameter | Value |
|---|---|
| C | 0.27166552334797633 |
| class_weight | |
| dual | False |
| fit_intercept | True |
| intercept_scaling | 1 |
| l1_ratio | 0.7965934415128313 |
| max_iter | 100 |
| multi_class | auto |
| n_jobs | -1 |
| penalty | elasticnet |
| random_state | |
| solver | saga |
| tol | 0.0001 |
| verbose | 0 |
| warm_start | False |
0.9028736451430929
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=True, shap_explainer=shap.LinearExplainer, model_repr=lr_repr)
print(report['train_score'])
Tuning params C, l1_ratio, class_weight
The max_iter was reached which means the coef_ did not converge
Tune result: 0.8778755141912032 auc.
| Metric | Value |
|---|---|
| roc | 0.876304 |
| brier_loss | 0.0917259 |
| specificity | 0.966216 |
| sensitivity | 0.350877 |
| accuracy | 0.866856 |
| Feature | Weight |
|---|---|
| Age | 1.52897 |
| Red Cell Blood Distribution Width (RDW) | 0.411721 |
| Heart rate (daily maximum) | 0.368521 |
| Body temperaure (daily maximum) | 0.360037 |
| Estimated glomerular filtration rate (eGFR) ckd-epi | -0.313 |
| Systolic blood pressure (first measure) | -0.312372 |
| Mean corpuscular volume | 0.309369 |
| Partial pressure of oxygen (Blood gas test) | 0.237908 |
| Lactate dehydrogenase (LDH) | 0.215156 |
| Partial pressure of CO2 (Blood gas test) | -0.214141 |
| Diastolic blood pressure (daily maximum) | 0.21212 |
| Body temperature (daily minimum) | 0.205722 |
| Platelets | -0.1838 |
| Total CO2 (blood gas test) | 0.175816 |
| Albumin | -0.15411 |
| Oxygen saturation (daily maximum) | -0.153838 |
| Blood urea nitrogen (BUN) | 0.140116 |
| Hematocrit | -0.130762 |
| Lymphocyte % | -0.117527 |
| Glucose | 0.115247 |
| Systolic blood pressure (daily maximum) | 0.0842292 |
| Urea | 0.066126 |
| Oxygen saturation (daily minimum) | -0.0605084 |
| Monocytes % | -0.0583326 |
| C-reactive protein | 0.0581361 |
| Current bicarbonate (blood gas test) | -0.0377462 |
| International normalized ratio (INR) | 0.0169834 |
| D-Dimer | 0.0143832 |
| Segmented neutrophils % | -0.0042528 |
| Hyperparameter | Value |
|---|---|
| C | 2.3396397269905416 |
| class_weight | |
| dual | False |
| fit_intercept | True |
| intercept_scaling | 1 |
| l1_ratio | 1.0 |
| max_iter | 100 |
| multi_class | auto |
| n_jobs | -1 |
| penalty | elasticnet |
| random_state | |
| solver | saga |
| tol | 0.0001 |
| verbose | 0 |
| warm_start | False |
0.8778755141912032
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=True, shap_explainer=shap.LinearExplainer, model_repr=lr_repr)
print(report['train_score'])
Tuning params C, l1_ratio, class_weight
The max_iter was reached which means the coef_ did not converge
Tune result: 0.8480042952774796 auc.
| Metric | Value |
|---|---|
| roc | 0.824806 |
| brier_loss | 0.172728 |
| specificity | 0.730233 |
| sensitivity | 0.740741 |
| accuracy | 0.731405 |
| Feature | Weight |
|---|---|
| Age | 1.52859 |
| Body temperaure (daily maximum) | 0.571957 |
| Red Cell Blood Distribution Width (RDW) | 0.325664 |
| Lymphocyte % | -0.280928 |
| Blood urea nitrogen (BUN) | 0.242536 |
| Lactate dehydrogenase (LDH) | 0.210772 |
| Heart rate (daily maximum) | 0.166012 |
| Albumin | -0.131302 |
| Calcium | -0.130377 |
| Heart rate (daily minimum) | 0.119985 |
| Oxygen saturation (daily minimum) | -0.106787 |
| Partial Thromboplastin Time ratio | 0.0791531 |
| D-Dimer | 0.0576698 |
| Lymphocyte count | 0.0540325 |
| Heart rate (first measure) | 0.0468054 |
| Activated Partial Thromboplastin Time (aPTT) | 0.0104338 |
| Monocytes % | 0 |
| Current bicarbonate (blood gas test) | 0 |
| Hyperparameter | Value |
|---|---|
| C | 0.1 |
| class_weight | balanced |
| dual | False |
| fit_intercept | True |
| intercept_scaling | 1 |
| l1_ratio | 0.5009484933418734 |
| max_iter | 100 |
| multi_class | auto |
| n_jobs | -1 |
| penalty | elasticnet |
| random_state | |
| solver | saga |
| tol | 0.0001 |
| verbose | 0 |
| warm_start | False |
0.8480042952774796
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=True, shap_explainer=shap.LinearExplainer, model_repr=lr_repr)
print(report['train_score'])
Tuning params C, l1_ratio, class_weight
The max_iter was reached which means the coef_ did not converge
Tune result: 0.8114693400167085 auc.
| Metric | Value |
|---|---|
| roc | 0.827214 |
| brier_loss | 0.0975671 |
| specificity | 0.961538 |
| sensitivity | 0.136364 |
| accuracy | 0.859551 |
| Feature | Weight |
|---|---|
| Age | 1.37553 |
| Heart rate (daily maximum) | 0.491425 |
| Hematocrit | -0.299552 |
| Systolic blood pressure (daily minimum) | 0.293022 |
| Systolic blood pressure (first measure) | -0.275652 |
| Lactate dehydrogenase (LDH) | 0.26678 |
| Mean corpuscular volume | 0.256462 |
| Systolic blood pressure (daily maximum) | -0.248719 |
| Glucose | 0.238793 |
| C-reactive protein | 0.190043 |
| Urea | 0.182359 |
| Oxygen saturation (daily maximum) | -0.167072 |
| Partial Thromboplastin Time ratio | 0.161217 |
| Hemolysis index | 0.15631 |
| Derived fibrinogen | 0.150584 |
| Hemoglobin | -0.143416 |
| Mean corpuscular hemoglobin concentration (MCHC) | -0.138783 |
| Monocytes % | -0.138 |
| Creatinine | 0.115695 |
| Segmented neutrophils % | 0.0935264 |
| Oxygen saturation (first measure) | 0.0856922 |
| Oxygen saturation (daily minimum) | -0.085041 |
| Heart rate (first measure) | -0.0607562 |
| D-Dimer | -0.0336893 |
| Blood urea nitrogen (BUN) | 0.0242479 |
| Prothrombin time (PT) | -0.0242128 |
| Eosinophil % | -0.00892222 |
| Lymphocyte % | 0.00803419 |
| Activated Partial Thromboplastin Time (aPTT) | 0 |
| Hyperparameter | Value |
|---|---|
| C | 0.6576313755130081 |
| class_weight | |
| dual | False |
| fit_intercept | True |
| intercept_scaling | 1 |
| l1_ratio | 0.27956404472462143 |
| max_iter | 100 |
| multi_class | auto |
| n_jobs | -1 |
| penalty | elasticnet |
| random_state | |
| solver | saga |
| tol | 0.0001 |
| verbose | 0 |
| warm_start | False |
0.8114693400167085
model = DecisionTreeClassifier(random_state=SEED, )
search_space = {
'criterion': ['gini', 'entropy', 'log_loss'],
'max_depth': Integer(1, 8),
'min_samples_split': Integer(2, 15),
'min_samples_leaf': Integer(1, 10),
'ccp_alpha': Real(0, 0.035),
'class_weight': Categorical(['balanced', None]),
'min_impurity_decrease': Real(0, 0.1),
'max_features': Categorical(['sqrt', 'log2', None])
}
def dt_repr(dt, feature_names=None, figsize=(20, 10), fontsize=7, max_depth=3):
if feature_names is None:
feature_names = dt.feature_names_in_
plt.figure(figsize=figsize)
plot_tree(dt,max_depth=max_depth, feature_names=feature_names, label='none', class_names=['Survive', 'Death'],
filled=True, rounded=True, proportion=True, impurity=False, precision=2, fontsize=fontsize)
plt.show()
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=dt_repr)
print(report['train_score'])
Tuning params criterion, max_depth, min_samples_split, min_samples_leaf, ccp_alpha, class_weight, min_impurity_decrease, max_features
The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before.
Tune result: 0.8486132080358292 auc.
| Metric | Value |
|---|---|
| roc | 0.80109 |
| brier_loss | 0.174041 |
| specificity | 0.817073 |
| sensitivity | 0.718182 |
| accuracy | 0.786517 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.0 |
| class_weight | balanced |
| criterion | gini |
| max_depth | 6 |
| max_features | |
| max_leaf_nodes | |
| min_impurity_decrease | 0.0 |
| min_samples_leaf | 10 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| random_state | 12312548 |
| splitter | best |
0.8486132080358292
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=dt_repr)
print(report['train_score'])
Tuning params criterion, max_depth, min_samples_split, min_samples_leaf, ccp_alpha, class_weight, min_impurity_decrease, max_features Tune result: 0.8015639406033493 auc.
| Metric | Value |
|---|---|
| roc | 0.796408 |
| brier_loss | 0.174694 |
| specificity | 0.574324 |
| sensitivity | 0.894737 |
| accuracy | 0.626062 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.035 |
| class_weight | balanced |
| criterion | entropy |
| max_depth | 8 |
| max_features | |
| max_leaf_nodes | |
| min_impurity_decrease | 0.0 |
| min_samples_leaf | 7 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| random_state | 12312548 |
| splitter | best |
0.8015639406033493
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=dt_repr)
print(report['train_score'])
Tuning params criterion, max_depth, min_samples_split, min_samples_leaf, ccp_alpha, class_weight, min_impurity_decrease, max_features Tune result: 0.8242380451918023 auc.
| Metric | Value |
|---|---|
| roc | 0.796985 |
| brier_loss | 0.189001 |
| specificity | 0.506977 |
| sensitivity | 1 |
| accuracy | 0.561983 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.0 |
| class_weight | balanced |
| criterion | log_loss |
| max_depth | 7 |
| max_features | |
| max_leaf_nodes | |
| min_impurity_decrease | 0.029163250328520157 |
| min_samples_leaf | 1 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| random_state | 12312548 |
| splitter | best |
0.8242380451918023
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=dt_repr)
print(report['train_score'])
Tuning params criterion, max_depth, min_samples_split, min_samples_leaf, ccp_alpha, class_weight, min_impurity_decrease, max_features Tune result: 0.7657213403880071 auc.
| Metric | Value |
|---|---|
| roc | 0.783362 |
| brier_loss | 0.187843 |
| specificity | 0.794872 |
| sensitivity | 0.727273 |
| accuracy | 0.786517 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.02649888916025202 |
| class_weight | balanced |
| criterion | entropy |
| max_depth | 4 |
| max_features | |
| max_leaf_nodes | |
| min_impurity_decrease | 0.009205638824541569 |
| min_samples_leaf | 6 |
| min_samples_split | 15 |
| min_weight_fraction_leaf | 0.0 |
| random_state | 12312548 |
| splitter | best |
0.7657213403880071
model = GradientBoostingClassifier(random_state=SEED)
search_space = {
'n_estimators': Integer(5, 150),
'learning_rate': Real(0.05, 0.2),
'max_features': Categorical(['log2', None]),
'n_iter_no_change': Integer(1, 10),
}
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=None)
print(report['train_score'])
Tuning params n_estimators, learning_rate, max_features, n_iter_no_change Tune result: 0.9155530287710902 auc.
| Metric | Value |
|---|---|
| roc | 0.88289 |
| brier_loss | 0.123538 |
| specificity | 0.918699 |
| sensitivity | 0.609091 |
| accuracy | 0.823034 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.0 |
| criterion | friedman_mse |
| init | |
| learning_rate | 0.05 |
| loss | log_loss |
| max_depth | 3 |
| max_features | log2 |
| max_leaf_nodes | |
| min_impurity_decrease | 0.0 |
| min_samples_leaf | 1 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| n_estimators | 150 |
| n_iter_no_change | 10 |
| random_state | 12312548 |
| subsample | 1.0 |
| tol | 0.0001 |
| validation_fraction | 0.1 |
| verbose | 0 |
| warm_start | False |
0.9155530287710902
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=None)
print(report['train_score'])
Tuning params n_estimators, learning_rate, max_features, n_iter_no_change Tune result: 0.8710921804757454 auc.
| Metric | Value |
|---|---|
| roc | 0.884068 |
| brier_loss | 0.0925896 |
| specificity | 0.976351 |
| sensitivity | 0.280702 |
| accuracy | 0.864023 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.0 |
| criterion | friedman_mse |
| init | |
| learning_rate | 0.18859591001753956 |
| loss | log_loss |
| max_depth | 3 |
| max_features | log2 |
| max_leaf_nodes | |
| min_impurity_decrease | 0.0 |
| min_samples_leaf | 1 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| n_estimators | 64 |
| n_iter_no_change | 6 |
| random_state | 12312548 |
| subsample | 1.0 |
| tol | 0.0001 |
| validation_fraction | 0.1 |
| verbose | 0 |
| warm_start | False |
0.8710921804757454
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=None)
print(report['train_score'])
Tuning params n_estimators, learning_rate, max_features, n_iter_no_change Tune result: 0.8652028594093621 auc.
| Metric | Value |
|---|---|
| roc | 0.891473 |
| brier_loss | 0.0747619 |
| specificity | 0.981395 |
| sensitivity | 0.148148 |
| accuracy | 0.88843 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.0 |
| criterion | friedman_mse |
| init | |
| learning_rate | 0.08410300395207498 |
| loss | log_loss |
| max_depth | 3 |
| max_features | log2 |
| max_leaf_nodes | |
| min_impurity_decrease | 0.0 |
| min_samples_leaf | 1 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| n_estimators | 100 |
| n_iter_no_change | 8 |
| random_state | 12312548 |
| subsample | 1.0 |
| tol | 0.0001 |
| validation_fraction | 0.1 |
| verbose | 0 |
| warm_start | False |
0.8652028594093621
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=shap.TreeExplainer, model_repr=None)
print(report['train_score'])
Tuning params n_estimators, learning_rate, max_features, n_iter_no_change
The objective has been evaluated at this point before.
Tune result: 0.8310113246078158 auc.
| Metric | Value |
|---|---|
| roc | 0.818036 |
| brier_loss | 0.0980626 |
| specificity | 0.99359 |
| sensitivity | 0.0909091 |
| accuracy | 0.882022 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.0 |
| criterion | friedman_mse |
| init | |
| learning_rate | 0.06464321510220243 |
| loss | log_loss |
| max_depth | 3 |
| max_features | log2 |
| max_leaf_nodes | |
| min_impurity_decrease | 0.0 |
| min_samples_leaf | 1 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| n_estimators | 107 |
| n_iter_no_change | 3 |
| random_state | 12312548 |
| subsample | 1.0 |
| tol | 0.0001 |
| validation_fraction | 0.1 |
| verbose | 0 |
| warm_start | False |
0.8310113246078158
def bn_repr(bn, feature_names):
bn.display()
model = spbn.KDBBNClassifierCLG(k=0, random_state=SEED, n_jobs=-1)
nombre_modelo = 'Naive-Bayes-CLG'
search_space = {
'mi_thres': Categorical([0, 0.1]),
'mi_nneighbors': Categorical([5, 10, 50, 100, 200]),
}
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8692631197044305 auc.
| Metric | Value |
|---|---|
| roc | 0.839135 |
| brier_loss | 0.202066 |
| specificity | 0.878049 |
| sensitivity | 0.554545 |
| accuracy | 0.77809 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 0 |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 200 |
| mi_thres | 0.0 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8692631197044305
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8439311399450586 auc.
| Metric | Value |
|---|---|
| roc | 0.847202 |
| brier_loss | 0.149663 |
| specificity | 0.89527 |
| sensitivity | 0.491228 |
| accuracy | 0.830028 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 0 |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8439311399450586
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8295335925872991 auc.
| Metric | Value |
|---|---|
| roc | 0.822394 |
| brier_loss | 0.120831 |
| specificity | 0.925581 |
| sensitivity | 0.37037 |
| accuracy | 0.863636 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 0 |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8295335925872991
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.7908185278009839 auc.
| Metric | Value |
|---|---|
| roc | 0.746503 |
| brier_loss | 0.166789 |
| specificity | 0.878205 |
| sensitivity | 0.227273 |
| accuracy | 0.797753 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 0 |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 10 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.7908185278009839
model = spbn.CLTANBNClassifierCLG(random_state=SEED, n_jobs=-1)
nombre_modelo = 'Chow-Liu-TAN-CLG'
search_space = {
'mi_thres': Categorical([0, 0.1]),
'mi_nneighbors': Categorical([5, 10, 50, 100, 200]),
}
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8645437091158602 auc.
| Metric | Value |
|---|---|
| roc | 0.841537 |
| brier_loss | 0.195423 |
| specificity | 0.878049 |
| sensitivity | 0.554545 |
| accuracy | 0.77809 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8645437091158602
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8267598351885397 auc.
| Metric | Value |
|---|---|
| roc | 0.845869 |
| brier_loss | 0.152701 |
| specificity | 0.885135 |
| sensitivity | 0.508772 |
| accuracy | 0.824363 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 200 |
| mi_thres | 0.0 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8267598351885397
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.7858673734782025 auc.
| Metric | Value |
|---|---|
| roc | 0.799655 |
| brier_loss | 0.13268 |
| specificity | 0.92093 |
| sensitivity | 0.333333 |
| accuracy | 0.855372 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 5 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.7858673734782025
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.707173173674928 auc.
| Metric | Value |
|---|---|
| roc | 0.730478 |
| brier_loss | 0.190851 |
| specificity | 0.858974 |
| sensitivity | 0.227273 |
| accuracy | 0.780899 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 100 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.707173173674928
model = spbn.KDBBNClassifierCLG(k=2, random_state=SEED, n_jobs=-1)
nombre_modelo = '2DB-BNC-CLG'
search_space = {
'mi_thres': Categorical([0, 0.1]),
'mi_nneighbors': Categorical([5, 10, 50, 100, 200]),
}
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8590092589799312 auc.
| Metric | Value |
|---|---|
| roc | 0.838452 |
| brier_loss | 0.199964 |
| specificity | 0.886179 |
| sensitivity | 0.518182 |
| accuracy | 0.772472 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 2 |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 200 |
| mi_thres | 0.0 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8590092589799312
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8350205181195418 auc.
| Metric | Value |
|---|---|
| roc | 0.844832 |
| brier_loss | 0.143931 |
| specificity | 0.898649 |
| sensitivity | 0.473684 |
| accuracy | 0.830028 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 2 |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8350205181195418
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8100308596877793 auc.
| Metric | Value |
|---|---|
| roc | 0.811542 |
| brier_loss | 0.127926 |
| specificity | 0.925581 |
| sensitivity | 0.333333 |
| accuracy | 0.859504 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 2 |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8100308596877793
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.7798885361552028 auc.
| Metric | Value |
|---|---|
| roc | 0.764277 |
| brier_loss | 0.146185 |
| specificity | 0.916667 |
| sensitivity | 0.181818 |
| accuracy | 0.825843 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 2 |
| max_indegree | 0 |
| max_iters | 0 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.7798885361552028
model = spbn.HCClassifierCLG(
random_state=SEED, n_jobs=-1, bn_score='bic'
)
nombre_modelo = 'Unconstrained-BNC-CLG'
search_space = {
'epsilon': Real(1e-6, 1, prior='log-uniform', base=20),
'max_indegree': Integer(0, 5),
}
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=50, verbose=0)
print(report['train_score'])
Tuning params epsilon, max_indegree
The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before.
Tune result: 0.8530678709129216 auc.
| Metric | Value |
|---|---|
| roc | 0.838101 |
| brier_loss | 0.188457 |
| specificity | 0.865854 |
| sensitivity | 0.581818 |
| accuracy | 0.77809 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 1.0000000000000002e-06 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 1 |
| max_iters | 2147483647 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8530678709129216
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=50, verbose=0)
print(report['train_score'])
Tuning params epsilon, max_indegree
The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before.
Tune result: 0.8276863756662045 auc.
| Metric | Value |
|---|---|
| roc | 0.815226 |
| brier_loss | 0.159931 |
| specificity | 0.902027 |
| sensitivity | 0.403509 |
| accuracy | 0.82153 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 1.0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 2 |
| max_iters | 2147483647 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8276863756662045
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=50, verbose=0)
print(report['train_score'])
Tuning params epsilon, max_indegree
The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before.
Tune result: 0.8148766489184498 auc.
| Metric | Value |
|---|---|
| roc | 0.749182 |
| brier_loss | 0.126782 |
| specificity | 0.939535 |
| sensitivity | 0.259259 |
| accuracy | 0.863636 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0.016566792669357525 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 4 |
| max_iters | 2147483647 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8148766489184498
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=50, verbose=0)
print(report['train_score'])
Tuning params epsilon, max_indegree
The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before. The objective has been evaluated at this point before.
Tune result: 0.725493919985148 auc.
| Metric | Value |
|---|---|
| roc | 0.660548 |
| brier_loss | 0.160867 |
| specificity | 0.923077 |
| sensitivity | 0.0909091 |
| accuracy | 0.820225 |
| Hyperparameter | Value |
|---|---|
| bn_score | bic |
| epsilon | 0.0002904553940801573 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 1 |
| max_iters | 2147483647 |
| n_jobs | -1 |
| num_folds | 10 |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.725493919985148
model = spbn.KDBBNClassifierSP(k=0, random_state=SEED, n_jobs=-1, bn_score='holdout-lik', max_iters=1000)
nombre_modelo = 'Naive-Bayes-SP'
search_space = {
'mi_thres': Categorical([0, 0.1]),
'mi_nneighbors': Categorical([5, 10, 50, 100]),
'bw_sel': Categorical(['scott', 'ucv']),
}
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8938889335377386 auc.
| Metric | Value |
|---|---|
| roc | 0.863193 |
| brier_loss | 0.17123 |
| specificity | 0.886179 |
| sensitivity | 0.590909 |
| accuracy | 0.794944 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 0 |
| max_indegree | 0 |
| max_iters | 1000 |
| mi_nneighbors | 50 |
| mi_thres | 0.0 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('node_type',) |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8938889335377386
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8522595499399539 auc.
| Metric | Value |
|---|---|
| roc | 0.856271 |
| brier_loss | 0.132122 |
| specificity | 0.89527 |
| sensitivity | 0.491228 |
| accuracy | 0.830028 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 0 |
| max_indegree | 0 |
| max_iters | 1000 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('node_type',) |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8522595499399539
report, data = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.848859498625713 auc.
| Metric | Value |
|---|---|
| roc | 0.823256 |
| brier_loss | 0.0990414 |
| specificity | 0.944186 |
| sensitivity | 0.222222 |
| accuracy | 0.863636 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 0 |
| max_indegree | 0 |
| max_iters | 1000 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('node_type',) |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.848859498625713
report, data = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.7896872551749745 auc.
| Metric | Value |
|---|---|
| roc | 0.797786 |
| brier_loss | 0.119946 |
| specificity | 0.948718 |
| sensitivity | 0.272727 |
| accuracy | 0.865169 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 0 |
| max_indegree | 0 |
| max_iters | 1000 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('node_type',) |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.7896872551749745
model = spbn.CLTANBNClassifierSP(random_state=SEED, n_jobs=-1, bn_score='holdout-lik', max_iters=1000)
nombre_modelo = 'Chow-Liu-TAN-SP'
search_space = {
'mi_thres': Categorical([0, 0.1]),
'mi_nneighbors': Categorical([5, 10, 50, 100]),
'bw_sel': Categorical(['scott', 'ucv']),
}
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8809358187449435 auc.
| Metric | Value |
|---|---|
| roc | 0.85765 |
| brier_loss | 0.171022 |
| specificity | 0.902439 |
| sensitivity | 0.563636 |
| accuracy | 0.797753 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 0 |
| max_iters | 1000 |
| mi_nneighbors | 10 |
| mi_thres | 0.0 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | node_type |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8809358187449435
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8347171440386129 auc.
| Metric | Value |
|---|---|
| roc | 0.837038 |
| brier_loss | 0.16004 |
| specificity | 0.885135 |
| sensitivity | 0.45614 |
| accuracy | 0.815864 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | ucv |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 0 |
| max_iters | 1000 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | node_type |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8347171440386129
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.7836899802185575 auc.
| Metric | Value |
|---|---|
| roc | 0.780879 |
| brier_loss | 0.111389 |
| specificity | 0.95814 |
| sensitivity | 0.222222 |
| accuracy | 0.876033 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 0 |
| max_iters | 1000 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | node_type |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.7836899802185575
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.7136411027568923 auc.
| Metric | Value |
|---|---|
| roc | 0.741841 |
| brier_loss | 0.153588 |
| specificity | 0.916667 |
| sensitivity | 0.227273 |
| accuracy | 0.831461 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 0 |
| max_iters | 1000 |
| mi_nneighbors | 100 |
| mi_thres | 0.0 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | node_type |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.7136411027568923
model = spbn.KDBBNClassifierSP(k=2, random_state=SEED, n_jobs=-1, bn_score='holdout-lik', max_iters=500)
nombre_modelo = '2DB-BNC-SP'
search_space = {
'mi_thres': Categorical([0, 0.1]),
'mi_nneighbors': Categorical([5, 10, 50, 100]),
'bw_sel': Categorical(['scott', 'normal_reference']),
}
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8759334490170628 auc.
| Metric | Value |
|---|---|
| roc | 0.827384 |
| brier_loss | 0.196265 |
| specificity | 0.890244 |
| sensitivity | 0.527273 |
| accuracy | 0.77809 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | normal_reference |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 2 |
| max_indegree | 0 |
| max_iters | 500 |
| mi_nneighbors | 5 |
| mi_thres | 0.0 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('node_type',) |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8759334490170628
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.825951000547996 auc.
| Metric | Value |
|---|---|
| roc | 0.848032 |
| brier_loss | 0.122064 |
| specificity | 0.945946 |
| sensitivity | 0.368421 |
| accuracy | 0.852691 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 2 |
| max_indegree | 0 |
| max_iters | 500 |
| mi_nneighbors | 100 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('node_type',) |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.825951000547996
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params mi_thres, mi_nneighbors, bw_sel Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8281660801661153 auc.
| Metric | Value |
|---|---|
| roc | 0.821189 |
| brier_loss | 0.105746 |
| specificity | 0.95814 |
| sensitivity | 0.222222 |
| accuracy | 0.876033 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| k | 2 |
| max_indegree | 0 |
| max_iters | 500 |
| mi_nneighbors | 50 |
| mi_thres | 0.1 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('node_type',) |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8281660801661153
model = spbn.HCClassifierSP(
random_state=SEED, n_jobs=-1, bn_score='holdout-lik', bw_sel='scott'
)
nombre_modelo = 'Unconstrained-BNC-SP'
search_space = {
'epsilon': Real(1e-6, 1, prior='log-uniform', base=20),
'max_indegree': Integer(1, 3),
}
report, data = model_demonstrate([1], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params epsilon, max_indegree Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8778172357077461 auc.
| Metric | Value |
|---|---|
| roc | 0.850887 |
| brier_loss | 0.155774 |
| specificity | 0.943089 |
| sensitivity | 0.427273 |
| accuracy | 0.783708 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0.5358557867652401 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 1 |
| max_iters | 2147483647 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('arcs', 'node_type') |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8778172357077461
report, _ = model_demonstrate([2], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params epsilon, max_indegree Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8354721548802896 auc.
| Metric | Value |
|---|---|
| roc | 0.858819 |
| brier_loss | 0.103887 |
| specificity | 0.956081 |
| sensitivity | 0.368421 |
| accuracy | 0.86119 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0.00023082848481274974 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 2 |
| max_iters | 2147483647 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('arcs', 'node_type') |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8354721548802896
report, _ = model_demonstrate([3,4,5], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params epsilon, max_indegree Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits
The objective has been evaluated at this point before.
Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.8064127100682388 auc.
| Metric | Value |
|---|---|
| roc | 0.766236 |
| brier_loss | 0.0927182 |
| specificity | 0.976744 |
| sensitivity | 0.0740741 |
| accuracy | 0.876033 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0.9954296486778969 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 1 |
| max_iters | 2147483647 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('arcs', 'node_type') |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.8064127100682388
report, _ = model_demonstrate([6], model, search_space, df_train, df_val, scale=False, shap_explainer=None,
model_repr=bn_repr, nombre_modelo=nombre_modelo, n_iter=15, verbose=1)
print(report['train_score'])
Tuning params epsilon, max_indegree Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Fitting 5 folds for each of 1 candidates, totalling 5 fits Tune result: 0.7364977629258331 auc.
| Metric | Value |
|---|---|
| roc | 0.727273 |
| brier_loss | 0.110971 |
| specificity | 0.948718 |
| sensitivity | 0.227273 |
| accuracy | 0.859551 |
| Hyperparameter | Value |
|---|---|
| bn_score | holdout-lik |
| bw_sel | scott |
| epsilon | 0.004745700905586502 |
| greedy_arc_remove | False |
| greedy_node_remove | False |
| greedy_prune_cv | 5 |
| greedy_prune_impatient | True |
| max_indegree | 3 |
| max_iters | 2147483647 |
| n_jobs | -1 |
| num_folds | 10 |
| operators | ('arcs', 'node_type') |
| patience | 0 |
| random_state | 12312548 |
| test_holdout_ratio | 0.2 |
| verbose | 0 |
0.7364977629258331
GR_OLAS = [(1,), (2,), (3,4,5), (6,)]
models = [
'LogisticRegression', 'DecisionTreeClassifier', 'GradientBoostingClassifier',
'Naive-Bayes-CLG', 'Naive-Bayes-SP', 'Chow-Liu-TAN-CLG', 'Chow-Liu-TAN-SP',
'2DB-BNC-CLG', '2DB-BNC-SP', 'Unconstrained-BNC-CLG', 'Unconstrained-BNC-SP'
]
reports = {}
for gr_ola in GR_OLAS:
r_ola = reports[gr_ola] = {}
for m in models:
r_ola[m] = load(generar_id(m, gr_ola))
def mod_brier(metrics):
metrics['brier_score'] = 1 - metrics['brier_loss']
metrics.pop('brier_loss')
return metrics
metrics = {
gr_ola: {
m: mod_brier(reports[gr_ola][m]['metrics'])
for m in models
}
for gr_ola in GR_OLAS
}
adapt_metrics = {
(' '.join(metric.upper().split('_')), 'W ' + (','.join([str(o) for o in gr_ola]))): {
model: metrics[gr_ola][model][metric]
for model in models
}
for metric in ['roc', 'brier_score', 'sensitivity', 'specificity', 'accuracy'] for gr_ola in GR_OLAS
}
import matplotlib.cm as cm
tabla = pd.DataFrame(adapt_metrics, )
tabla = tabla.style.set_table_styles(
[dict(selector='th', props=[('text-align', 'center')])])
tabla.background_gradient(cmap=cm.bwr, )
tabla.format(precision=2)
tabla.set_properties(**{
'text-align': 'center',
'border': '0.5px solid;'
})
tabla.set_table_styles([
{'selector': 'th', 'props': 'border: 1px solid; background-color: whitesmoke;'}
], overwrite=False)
tabla.set_table_styles({
('ROC', 'W 6'): [{'selector': 'td', 'props': 'border-right: 1.5px solid white'}],
('BRIER SCORE', 'W 6'): [{'selector': 'td', 'props': 'border-right: 1.5px solid white'}],
('SPECIFICITY', 'W 6'): [{'selector': 'td', 'props': 'border-right: 1.5px solid white'}],
('SENSITIVITY', 'W 6'): [{'selector': 'td', 'props': 'border-right: 1.5px solid white'}]
}, overwrite=False, axis=0)
tabla
| ROC | BRIER SCORE | SENSITIVITY | SPECIFICITY | ACCURACY | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| W 1 | W 2 | W 3,4,5 | W 6 | W 1 | W 2 | W 3,4,5 | W 6 | W 1 | W 2 | W 3,4,5 | W 6 | W 1 | W 2 | W 3,4,5 | W 6 | W 1 | W 2 | W 3,4,5 | W 6 | |
| LogisticRegression | 0.87 | 0.88 | 0.82 | 0.83 | 0.87 | 0.91 | 0.83 | 0.90 | 0.60 | 0.35 | 0.74 | 0.14 | 0.90 | 0.97 | 0.73 | 0.96 | 0.81 | 0.87 | 0.73 | 0.86 |
| DecisionTreeClassifier | 0.80 | 0.80 | 0.80 | 0.78 | 0.83 | 0.83 | 0.81 | 0.81 | 0.72 | 0.89 | 1.00 | 0.73 | 0.82 | 0.57 | 0.51 | 0.79 | 0.79 | 0.63 | 0.56 | 0.79 |
| GradientBoostingClassifier | 0.88 | 0.88 | 0.89 | 0.82 | 0.88 | 0.91 | 0.93 | 0.90 | 0.61 | 0.28 | 0.15 | 0.09 | 0.92 | 0.98 | 0.98 | 0.99 | 0.82 | 0.86 | 0.89 | 0.88 |
| Naive-Bayes-CLG | 0.84 | 0.85 | 0.82 | 0.75 | 0.80 | 0.85 | 0.88 | 0.83 | 0.55 | 0.49 | 0.37 | 0.23 | 0.88 | 0.90 | 0.93 | 0.88 | 0.78 | 0.83 | 0.86 | 0.80 |
| Naive-Bayes-SP | 0.86 | 0.86 | 0.82 | 0.80 | 0.83 | 0.87 | 0.90 | 0.88 | 0.59 | 0.49 | 0.22 | 0.27 | 0.89 | 0.90 | 0.94 | 0.95 | 0.79 | 0.83 | 0.86 | 0.87 |
| Chow-Liu-TAN-CLG | 0.84 | 0.85 | 0.80 | 0.73 | 0.80 | 0.85 | 0.87 | 0.81 | 0.55 | 0.51 | 0.33 | 0.23 | 0.88 | 0.89 | 0.92 | 0.86 | 0.78 | 0.82 | 0.86 | 0.78 |
| Chow-Liu-TAN-SP | 0.86 | 0.84 | 0.78 | 0.74 | 0.83 | 0.84 | 0.89 | 0.85 | 0.56 | 0.46 | 0.22 | 0.23 | 0.90 | 0.89 | 0.96 | 0.92 | 0.80 | 0.82 | 0.88 | 0.83 |
| 2DB-BNC-CLG | 0.84 | 0.84 | 0.81 | 0.76 | 0.80 | 0.86 | 0.87 | 0.85 | 0.52 | 0.47 | 0.33 | 0.18 | 0.89 | 0.90 | 0.93 | 0.92 | 0.77 | 0.83 | 0.86 | 0.83 |
| 2DB-BNC-SP | 0.83 | 0.85 | 0.82 | 0.75 | 0.80 | 0.88 | 0.89 | 0.87 | 0.53 | 0.37 | 0.22 | 0.18 | 0.89 | 0.95 | 0.96 | 0.93 | 0.78 | 0.85 | 0.88 | 0.84 |
| Unconstrained-BNC-CLG | 0.84 | 0.82 | 0.75 | 0.66 | 0.81 | 0.84 | 0.87 | 0.84 | 0.58 | 0.40 | 0.26 | 0.09 | 0.87 | 0.90 | 0.94 | 0.92 | 0.78 | 0.82 | 0.86 | 0.82 |
| Unconstrained-BNC-SP | 0.85 | 0.86 | 0.77 | 0.73 | 0.84 | 0.90 | 0.91 | 0.89 | 0.43 | 0.37 | 0.07 | 0.23 | 0.94 | 0.96 | 0.98 | 0.95 | 0.78 | 0.86 | 0.88 | 0.86 |
model = reports[(1,)]['GradientBoostingClassifier']['model']
report, data = model_demonstrate(
[1], model, None, pd.concat([df_train, df_val], axis=0), df_test, scale=False, shap_explainer=shap.TreeExplainer,
model_repr=None, n_iter=0, nombre_modelo='GradientBoostingClassifier', es_test=True)
| Metric | Value |
|---|---|
| roc | 0.889103 |
| brier_loss | 0.123066 |
| specificity | 0.911765 |
| sensitivity | 0.622951 |
| accuracy | 0.822335 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.0 |
| criterion | friedman_mse |
| init | |
| learning_rate | 0.05 |
| loss | log_loss |
| max_depth | 3 |
| max_features | log2 |
| max_leaf_nodes | |
| min_impurity_decrease | 0.0 |
| min_samples_leaf | 1 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| n_estimators | 150 |
| n_iter_no_change | 10 |
| random_state | 12312548 |
| subsample | 1.0 |
| tol | 0.0001 |
| validation_fraction | 0.1 |
| verbose | 0 |
| warm_start | False |
model = reports[(2,)]['GradientBoostingClassifier']['model']
report, data = model_demonstrate(
[2], model, None, pd.concat([df_train, df_val], axis=0), df_test, scale=False, shap_explainer=shap.TreeExplainer,
model_repr=None, n_iter=0, nombre_modelo='GradientBoostingClassifier', es_test=True)
| Metric | Value |
|---|---|
| roc | 0.881983 |
| brier_loss | 0.0889052 |
| specificity | 0.957317 |
| sensitivity | 0.483871 |
| accuracy | 0.882051 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.0 |
| criterion | friedman_mse |
| init | |
| learning_rate | 0.18859591001753956 |
| loss | log_loss |
| max_depth | 3 |
| max_features | log2 |
| max_leaf_nodes | |
| min_impurity_decrease | 0.0 |
| min_samples_leaf | 1 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| n_estimators | 64 |
| n_iter_no_change | 6 |
| random_state | 12312548 |
| subsample | 1.0 |
| tol | 0.0001 |
| validation_fraction | 0.1 |
| verbose | 0 |
| warm_start | False |
model = reports[(3,4,5,)]['GradientBoostingClassifier']['model']
report, data = model_demonstrate(
[3,4,5], model, None, pd.concat([df_train, df_val], axis=0), df_test, scale=False, shap_explainer=shap.TreeExplainer,
model_repr=None, n_iter=0, nombre_modelo='GradientBoostingClassifier', es_test=True)
| Metric | Value |
|---|---|
| roc | 0.868347 |
| brier_loss | 0.0862825 |
| specificity | 0.966387 |
| sensitivity | 0.133333 |
| accuracy | 0.873134 |
| Hyperparameter | Value |
|---|---|
| ccp_alpha | 0.0 |
| criterion | friedman_mse |
| init | |
| learning_rate | 0.08410300395207498 |
| loss | log_loss |
| max_depth | 3 |
| max_features | log2 |
| max_leaf_nodes | |
| min_impurity_decrease | 0.0 |
| min_samples_leaf | 1 |
| min_samples_split | 2 |
| min_weight_fraction_leaf | 0.0 |
| n_estimators | 100 |
| n_iter_no_change | 8 |
| random_state | 12312548 |
| subsample | 1.0 |
| tol | 0.0001 |
| validation_fraction | 0.1 |
| verbose | 0 |
| warm_start | False |
model = reports[(6,)]['LogisticRegression']['model']
report, data = model_demonstrate(
[6], model, None, pd.concat([df_train, df_val], axis=0), df_test, shap_explainer=shap.LinearExplainer,
model_repr=lr_repr, n_iter=0, nombre_modelo='LogisticRegression', es_test=True)
The max_iter was reached which means the coef_ did not converge
| Metric | Value |
|---|---|
| roc | 0.747126 |
| brier_loss | 0.105356 |
| specificity | 0.977011 |
| sensitivity | 0.0833333 |
| accuracy | 0.868687 |
| Feature | Weight |
|---|---|
| Age | 1.45282 |
| Heart rate (daily maximum) | 0.419777 |
| Systolic blood pressure (daily maximum) | -0.359161 |
| Mean corpuscular volume | 0.292682 |
| Systolic blood pressure (daily minimum) | 0.287555 |
| Hematocrit | -0.26281 |
| Systolic blood pressure (first measure) | -0.256907 |
| Glucose | 0.220738 |
| C-reactive protein | 0.197807 |
| Monocytes % | -0.173081 |
| Oxygen saturation (daily maximum) | -0.163244 |
| Hemolysis index | 0.161996 |
| Lactate dehydrogenase (LDH) | 0.160064 |
| Partial Thromboplastin Time ratio | 0.158866 |
| Oxygen saturation (daily minimum) | -0.149929 |
| Mean corpuscular hemoglobin concentration (MCHC) | -0.141669 |
| Hemoglobin | -0.141086 |
| Creatinine | 0.133404 |
| Segmented neutrophils % | 0.120926 |
| Oxygen saturation (first measure) | 0.0912027 |
| Urea | 0.068128 |
| Eosinophil % | -0.0461896 |
| Heart rate (first measure) | 0.0459791 |
| Derived fibrinogen | 0.0406181 |
| Prothrombin time (PT) | -0.0300428 |
| D-Dimer | -0.021472 |
| Activated Partial Thromboplastin Time (aPTT) | 0.0191442 |
| Lymphocyte % | -0.0172647 |
| Blood urea nitrogen (BUN) | -0.00126195 |
| Hyperparameter | Value |
|---|---|
| C | 0.6576313755130081 |
| class_weight | |
| dual | False |
| fit_intercept | True |
| intercept_scaling | 1 |
| l1_ratio | 0.27956404472462143 |
| max_iter | 100 |
| multi_class | auto |
| n_jobs | -1 |
| penalty | elasticnet |
| random_state | |
| solver | saga |
| tol | 0.0001 |
| verbose | 0 |
| warm_start | False |
bn_models = ['Naive-Bayes-CLG', 'Naive-Bayes-SP', 'Chow-Liu-TAN-CLG', 'Chow-Liu-TAN-SP',
'2DB-BNC-CLG', '2DB-BNC-SP', 'Unconstrained-BNC-CLG', 'Unconstrained-BNC-SP']
for gr_olas in GR_OLAS:
for bn_m in bn_models:
bn_svg = reports[gr_olas][bn_m]['model'].as_pydot().create_svg().decode('utf-8')
with open(f'clasificadoresBayesianos/{bn_m}.svg', 'w') as f:
f.write(bn_svg)
def markov_blanket(bn_model, class_name=translate["EXITUS"]):
graph = bn_model.bn_.graph()
children = graph.children(class_name)
spouses = set()
for c in children:
spouses = spouses.union(graph.parents(c))
spouses.remove(class_name)
mb = spouses.union(children)
return list(mb)
for bn_m in ['Unconstrained-BNC-CLG', 'Unconstrained-BNC-SP']:
for gr_olas in GR_OLAS:
bn_model = reports[gr_olas][bn_m]['model']
print(f'Model {bn_m}, wave/s {gr_olas} Markov blanket:')
print(' - ' + '\n - '.join(markov_blanket(bn_model)))
print()
Model Unconstrained-BNC-CLG, wave/s (1,) Markov blanket: - Calcium - International normalized ratio (INR) - Oxygen saturation (daily maximum) - Blood urea nitrogen (BUN) - Partial pressure of oxygen (Blood gas test) - Glucose - Lactate dehydrogenase (LDH) - Heart rate (daily maximum) - Oxygen saturation (daily minimum) - Age - Red Cell Blood Distribution Width (RDW) Model Unconstrained-BNC-CLG, wave/s (2,) Markov blanket: - Urea - Hematocrit - International normalized ratio (INR) - Blood urea nitrogen (BUN) - Partial pressure of CO2 (Blood gas test) - Oxygen saturation (daily maximum) - Partial pressure of oxygen (Blood gas test) - Diastolic blood pressure (daily maximum) - Estimated glomerular filtration rate (eGFR) ckd-epi - D-Dimer - Lactate dehydrogenase (LDH) - Heart rate (daily maximum) - Age - Red Cell Blood Distribution Width (RDW) - Current bicarbonate (blood gas test) Model Unconstrained-BNC-CLG, wave/s (3, 4, 5) Markov blanket: - Lymphocyte count - Lymphocyte % - Albumin - Body temperaure (daily maximum) - Red Cell Blood Distribution Width (RDW) - Heart rate (daily minimum) - Current bicarbonate (blood gas test) - Blood urea nitrogen (BUN) - D-Dimer - Lactate dehydrogenase (LDH) - Heart rate (daily maximum) - Age Model Unconstrained-BNC-CLG, wave/s (6,) Markov blanket: - Blood urea nitrogen (BUN) - Oxygen saturation (daily maximum) - Glucose - Hemolysis index - Heart rate (first measure) Model Unconstrained-BNC-SP, wave/s (1,) Markov blanket: - Calcium - Oxygen saturation (daily maximum) - Prothrombin time (PT) - Oxygen saturation (daily minimum) - Age Model Unconstrained-BNC-SP, wave/s (2,) Markov blanket: - Urea - Partial pressure of CO2 (Blood gas test) - Estimated glomerular filtration rate (eGFR) ckd-epi - Albumin - Systolic blood pressure (daily maximum) - Oxygen saturation (daily minimum) - Total CO2 (blood gas test) - International normalized ratio (INR) - D-Dimer - Heart rate (daily maximum) - Age Model Unconstrained-BNC-SP, wave/s (3, 4, 5) Markov blanket: - Age - Oxygen saturation (daily minimum) - Current bicarbonate (blood gas test) Model Unconstrained-BNC-SP, wave/s (6,) Markov blanket: - Hemoglobin - Hematocrit - Oxygen saturation (daily maximum) - Systolic blood pressure (first measure) - Lactate dehydrogenase (LDH) - Systolic blood pressure (daily minimum) - Prothrombin time (PT) - Heart rate (daily maximum) - Systolic blood pressure (daily maximum) - Oxygen saturation (daily minimum) - Heart rate (first measure) - Mean corpuscular hemoglobin concentration (MCHC)